# Computations
import numpy as np
import pandas as pd
# sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score, KFold
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
# Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import re
import us
import pgeocode
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## seaborn
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
## matplotlib
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
from matplotlib.font_manager import FontProperties
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
# %config InlineBackend.figure_format = 'retina'
import missingno as msno
import warnings
warnings.filterwarnings("ignore")
In this article, we use the Consumer Complaint Database published by Consumer Financial Protection Bureau.
According to Kaggle page ofd Consumer Financial Protection Bureau (CFPB), Each week the CFPB sends thousands of consumers’ complaints about financial products and services to companies for response. Those complaints are published here after the company responds or after 15 days, whichever comes first. By adding their voice, consumers help improve the financial marketplace.
Data = pd.read_csv('Data/complaints.csv')
Data.head(3)
| Date received | Product | Sub-product | Issue | Sub-issue | Consumer complaint narrative | Company public response | Company | State | ZIP code | Tags | Consumer consent provided? | Submitted via | Date sent to company | Company response to consumer | Timely response? | Consumer disputed? | Complaint ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-09-24 | Debt collection | I do not know | Attempts to collect debt not owed | Debt is not yours | transworld systems inc. \nis trying to collect... | NaN | TRANSWORLD SYSTEMS INC | FL | 335XX | NaN | Consent provided | Web | 2019-09-24 | Closed with explanation | Yes | NaN | 3384392 |
| 1 | 2019-09-19 | Credit reporting, credit repair services, or o... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | Company has responded to the consumer and the ... | Experian Information Solutions Inc. | PA | 15206 | NaN | Consent not provided | Web | 2019-09-20 | Closed with non-monetary relief | Yes | NaN | 3379500 |
| 2 | 2019-10-25 | Credit reporting, credit repair services, or o... | Credit reporting | Incorrect information on your report | Information belongs to someone else | I would like to request the suppression of the... | Company has responded to the consumer and the ... | TRANSUNION INTERMEDIATE HOLDINGS, INC. | CA | 937XX | NaN | Consent provided | Web | 2019-10-25 | Closed with explanation | Yes | NaN | 3417821 |
Feat= 'Date received'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
fig = px.line(Group, x= Feat, y='Count', title='Consumer Complaints by %s' % Feat.title())
fig.update_layout(xaxis_title= Feat.title())
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(plot_bgcolor= 'WhiteSmoke')
fig['layout']['yaxis'].update(range=[0, 4e3])
fig.show()
As can be seen, the number of complaints has been increasing overall.
Feat= 'Product'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),3)
display(Group.sort_values(by=['Percentage'], ascending = False).\
style.background_gradient(cmap='Reds', subset=['Percentage']).set_precision(4))
fig = px.bar(Group, y= Feat, x= 'Percentage', orientation='h',text = 'Count',
title='Consumer Complaints by %s' % Feat.title())
fig.update_traces(marker_color='orange', marker_line_color='DarkRed', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 700)
fig['layout']['xaxis'].update(range=[0, 30])
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
| Product | Count | Percentage | |
|---|---|---|---|
| 6 | Credit reporting, credit repair services, or other personal consumer reports | 381290 | 24.2270 |
| 10 | Mortgage | 300314 | 19.0820 |
| 7 | Debt collection | 290394 | 18.4510 |
| 5 | Credit reporting | 140432 | 8.9230 |
| 3 | Credit card | 89190 | 5.6670 |
| 0 | Bank account or service | 86206 | 5.4770 |
| 4 | Credit card or prepaid card | 74134 | 4.7100 |
| 1 | Checking or savings account | 61933 | 3.9350 |
| 15 | Student loan | 57631 | 3.6620 |
| 2 | Consumer Loan | 31604 | 2.0080 |
| 16 | Vehicle loan or lease | 16766 | 1.0650 |
| 8 | Money transfer, virtual currency, or money service | 15266 | 0.9700 |
| 13 | Payday loan, title loan, or personal loan | 12886 | 0.8190 |
| 12 | Payday loan | 5543 | 0.3520 |
| 9 | Money transfers | 5354 | 0.3400 |
| 14 | Prepaid card | 3819 | 0.2430 |
| 11 | Other financial service | 1059 | 0.0670 |
| 17 | Virtual currency | 18 | 0.0010 |
Feat= 'State'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
fig = go.Figure(data=go.Choropleth(locations = Group[Feat],
z = Group['Count'].astype(float),
locationmode = 'USA-states',
colorscale = 'Reds',
colorbar_title = 'Number of<br>Consumer<br>Complaints' ))
fig.update_layout(title_text = 'Consumer Complaints by %s' % Feat.title(),
geo_scope='usa')
fig.show()
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),3)
Group = Group.replace(us.states.mapping('abbr', 'name'))
Group.loc[Group.State.isin(['AA', 'AE', 'AP', 'FM', 'MH', 'PW',
'UNITED STATES MINOR OUTLYING ISLANDS']), 'State'] = 'Other States'
Group = Group.sort_values(by=Feat)
fig = px.bar(Group, x= Feat, y= 'Percentage', text = 'Count', title='Consumer Complaints by %s' % Feat.title())
fig.update_traces(marker_color='DarkOrchid', marker_line_color='Indigo', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 600)
fig['layout']['yaxis'].update(range=[0, 16])
fig.update_layout(plot_bgcolor= 'white')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
The top ten states with the highest numbers of complaints can be found in the following table.
display(Group.sort_values(by=['Percentage'], ascending = False)[:10].\
style.hide_index().background_gradient(cmap='Reds', subset=['Percentage']).set_precision(4))
| State | Count | Percentage |
|---|---|---|
| California | 212953 | 13.7640 |
| Florida | 165244 | 10.6810 |
| Texas | 134235 | 8.6760 |
| New York | 107450 | 6.9450 |
| Georgia | 84399 | 5.4550 |
| Illinois | 59893 | 3.8710 |
| New Jersey | 56847 | 3.6740 |
| Pennsylvania | 54302 | 3.5100 |
| North Carolina | 48286 | 3.1210 |
| Ohio | 45959 | 2.9710 |
Data['Date received'] = pd.to_datetime(Data['Date received'])
Data['Date sent to company'] = pd.to_datetime(Data['Date sent to company'])
Group = (Data['Date sent to company'] - Data['Date received'])
Group = Group .value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':'Days'})
Group['Days'] = (Group['Days']/ np.timedelta64(1, 'D')).astype(int)
Group.loc[Group['Days']<0, 'Days'] =0
Group = Group.sort_values(by=['Days'])
fig = px.line(Group, x= 'Days', y='Count', title='Consumer Complaint Sent to Company')
fig.update_layout(xaxis_title= Feat.title())
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(plot_bgcolor= 'WhiteSmoke')
fig['layout']['yaxis'].update(range=[0, 1.2e6])
fig.update_layout(xaxis_title= 'Days After Complain Received')
fig.show()
Temp = ['Company response to consumer','Timely response?', 'Consumer disputed?']
Group = Data[Temp].groupby(Temp)[[Temp[0]]].agg('count').rename(columns = {Temp[0]:'Count'}).reset_index(drop = False)
Group['Label'] = np.nan
Group.loc[Group['Timely response?'] == 'Yes', 'Label'] = Group['Company response to consumer'] + ' (Timely response)'
Group.loc[Group['Timely response?'] == 'No', 'Label'] = Group['Company response to consumer'] + ' (Not Timely response)'
Group['Percentage'] = np.round(100* Group['Count'] /Group['Count'].sum(), 2)
C = ['aquamarine', 'steelblue']
SC = 'Navy'
fig = px.bar(Group, x= 'Percentage', y= 'Label', orientation='h',
color = 'Consumer disputed?', text = 'Percentage', color_discrete_sequence= C, height= 500)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title = """Customers' Complains by Companies""", plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(yaxis_title= 'Company Response to Consumer')
fig.show()
The object of the exercise here is to link the narrative of a complaint with the product. For this reason, we would like to use a form of text processing and combine Keras artificial neural network (ANN) for creating a predictive model.
def Data_info(Inp, Only_NaN = False):
Out = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = Out.join(Inp.isnull().sum().to_frame(name = 'Number of NaN Values'), how='outer')
Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
if Only_NaN:
Out = Out.loc[Out['Number of NaN Values']>0]
return Out
Data_info(Data)
| Data Type | Number of NaN Values | Percentage | |
|---|---|---|---|
| Company | object | 0 | 0.00 |
| Company public response | object | 976613 | 62.05 |
| Company response to consumer | object | 1 | 0.00 |
| Complaint ID | int64 | 0 | 0.00 |
| Consumer complaint narrative | object | 1052704 | 66.89 |
| Consumer consent provided? | object | 633534 | 40.25 |
| Consumer disputed? | object | 805362 | 51.17 |
| Date received | datetime64[ns] | 0 | 0.00 |
| Date sent to company | datetime64[ns] | 0 | 0.00 |
| Issue | object | 0 | 0.00 |
| Product | object | 0 | 0.00 |
| State | object | 26691 | 1.70 |
| Sub-issue | object | 566062 | 35.97 |
| Sub-product | object | 235165 | 14.94 |
| Submitted via | object | 0 | 0.00 |
| Tags | object | 1358856 | 86.34 |
| Timely response? | object | 0 | 0.00 |
| ZIP code | object | 149390 | 9.49 |
However, we need to focus only on Consumer complaint narrative and Product features. Therefore,
df = Data[['Consumer complaint narrative', 'Product']]
df = df.dropna(subset=['Consumer complaint narrative'])
display(df.head(5))
display(df.isnull().sum().to_frame('Number of NaN Values'))
| Consumer complaint narrative | Product | |
|---|---|---|
| 0 | transworld systems inc. \nis trying to collect... | Debt collection |
| 2 | I would like to request the suppression of the... | Credit reporting, credit repair services, or o... |
| 3 | Over the past 2 weeks, I have been receiving e... | Debt collection |
| 8 | XX/XX/2020 : Sent letter to original creditor ... | Credit reporting, credit repair services, or o... |
| 11 | I was sold access to an event digitally, of wh... | Money transfer, virtual currency, or money ser... |
| Number of NaN Values | |
|---|---|
| Consumer complaint narrative | 0 |
| Product | 0 |
First, we need to do text tokenization. This can be done using Keras preprocessing text tokenizer.
tokenize = keras.preprocessing.text.Tokenizer(num_words= 1000, char_level=False)
# Train and Test sets
train, test= train_test_split(df, test_size=0.3, random_state=42)
# X_train, X_test
tokenize.fit_on_texts(train['Consumer complaint narrative'])
X_train = tokenize.texts_to_matrix(train['Consumer complaint narrative'])
X_test = tokenize.texts_to_matrix(test['Consumer complaint narrative'])
# y_train, y_test
encoder = LabelEncoder()
encoder.fit(df['Product'])
y_train = encoder.transform(train['Product'])
y_test = encoder.transform(test['Product'])
Moreover, categorical variables can be converted into indicator variables using tf.keras.utils.to_categorical.
num_classes = len(np.unique(y_train))
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T
| Set | X_train | X_test | y_train | y_test |
|---|---|---|---|---|
| Shape | (364794, 1000) | (156341, 1000) | (364794, 18) | (156341, 18) |
For this study, we use the Sequential model, which is a linear stack of layers.
model = keras.Sequential(name = "Keras_NLP_Model")
model.add(layers.Dense(512, input_dim= X_train.shape[1], kernel_initializer='uniform', activation='relu', name='Layer1'))
model.add(layers.Dense(128, kernel_initializer='uniform', activation='sigmoid', name='Layer2'))
model.add(layers.Dense(64, kernel_initializer='uniform', activation='sigmoid', name='Layer3'))
model.add(layers.Dense(y_train.shape[1], kernel_initializer='uniform', activation='sigmoid', name='Layer4'))
model.summary()
keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True, rankdir = 'LR')
Model: "Keras_NLP_Model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= Layer1 (Dense) (None, 512) 512512 _________________________________________________________________ Layer2 (Dense) (None, 128) 65664 _________________________________________________________________ Layer3 (Dense) (None, 64) 8256 _________________________________________________________________ Layer4 (Dense) (None, 18) 1170 ================================================================= Total params: 587,602 Trainable params: 587,602 Non-trainable params: 0 _________________________________________________________________
# Number of iterations
IT = 6
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy','mae', 'mse'])
# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs= IT, batch_size=32, verbose = 0)
# # Train model
# history = model.fit(X_train, y_train, nb_epoch= IT, batch_size=32, verbose=0)
# # Predications and Score
# y_pred = model.predict(X_test)
# score = model.evaluate(X_test, y_test)
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(X_test, y_test, batch_size=128, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(X_train, y_train, batch_size=128, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | MAE | MSE | |
|---|---|---|---|---|
| Train Set Score | 0.7519 | 0.7860 | 0.0556 | 0.0556 |
| Validation Set Score | 0.7939 | 0.6554 | 0.0556 | 0.0556 |
def Plot_history(history, Title = False, Table_Rows = 25, YL = 1):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].values,
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MAE'].values,
line=dict(color='ForestGreen', width= 1.5), name = 'Mean Absolute Error (MAE)'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MSE'].values,
line=dict(color='purple', width= 1.5), name = 'Mean Squared Error (MSE)'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, YL], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.shape[0], Table_Rows, endpoint = False).round(0).astype(int)
ind = np.append(ind, history.Iteration.values[-1])
history = history[history.index.isin(ind)]
Temp = []
for i in history.columns:
Temp.append(history.loc[:,i].astype(float).round(4).values)
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='DimGray', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray', fill=dict(color=['WhiteSmoke', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
def Confusion_Matrix(Model, X, y, Labels, FG = (14, 5)):
fig, ax = plt.subplots(1, 2, figsize=FG)
y_pred = Model.predict(X)
if y.shape[1] > 1:
CM = confusion_matrix(y.argmax(axis = 1), y_pred.argmax(axis = 1))
else:
CM = confusion_matrix(y, np.round(y_pred))
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0])
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
CM = CM.astype('float') / CM.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)
return fig, ax
def Plot_Classification(Model, X, y, Labels, BP = .5, Alpha=0.6, ax = False, fs = 7, ColorMap = 'Spectral'):
h=0.02
pad=0.25
# adding margins
x_min, x_max = X[:, 0].min()-pad, X[:, 0].max()+pad
y_min, y_max = X[:, 1].min()-pad, X[:, 1].max()+pad
# Generating meshgrids
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predictions
Pred = Model.predict(np.c_[xx.ravel(), yy.ravel()])
if y.shape[1] > 1:
Pred = Pred.argmax(axis = 1)
Pred = Pred.reshape(xx.shape)
# Figure
if ax == False:
fig, ax = plt.subplots(1, 1, figsize=(fs, fs))
_ = ax.contourf(xx, yy, Pred, cmap = ColorMap, alpha=0.2)
if y.shape[1] > 1:
y = y.argmax(axis = 1)
else:
y = np.round(y).T[0]
scatter = ax.scatter(X[:,0], X[:,1], s=70, c=y, edgecolor = 'Navy', alpha = Alpha, cmap = ColorMap)
_ = ax.legend(handles=scatter.legend_elements()[0], labels= Labels,
fancybox=True, framealpha=1, shadow=True, borderpad=BP, loc='best', fontsize = 14)
_ = ax.set_xlim(x_min, x_max)
_ = ax.set_ylim(y_min, y_max)
_ = ax.set_xlabel(r'$X_1$')
_ = ax.set_ylabel(r'$X_2$')
Plot_history(Train_Table, Title = 'Train Set', YL = 1.2)
Plot_history(Validation_Table, Title = 'Validation Set')
Testing the results.
# Picking a random sample
Ind = np.random.randint(X_test.shape[0], size=10)
for i in Ind:
Pred = model.predict(np.array([X_test[i]]))
Pred = encoder.classes_[np.argmax(Pred)]
print(Back.MAGENTA + Fore.BLACK + Style.BRIGHT + 'Narrative:' + Style.RESET_ALL + ' %s ...' %
test['Consumer complaint narrative'].values[i][:80])
print(Back.CYAN + Fore.BLACK + Style.BRIGHT + 'Label (Actual):' + Style.RESET_ALL + ' %s' % test['Product'].values[i])
print(Back.GREEN + Fore.BLACK + Style.BRIGHT + 'Label (Predicted):' + Style.RESET_ALL + ' %s' % Pred)
print('---------------------------------------')
del Ind, Pred
Narrative: Carrington Mortgage Services failed to send my billing statement after purchasin ... Label (Actual): Mortgage Label (Predicted): Mortgage --------------------------------------- Narrative: XXXX XXXX XXXX XXXX SETERUS, INC has their insurance agency XXXX XXXX sending mu ... Label (Actual): Mortgage Label (Predicted): Debt collection --------------------------------------- Narrative: The XXXX amount owed is not my debt. XX/XX/15 ... Label (Actual): Debt collection Label (Predicted): Debt collection --------------------------------------- Narrative: I mailed a certified handwritten letter to the Doctors Business Bureau on XX/XX/ ... Label (Actual): Debt collection Label (Predicted): Debt collection --------------------------------------- Narrative: Equifax mortgage solutions continues to report incorrect information on my credi ... Label (Actual): Credit reporting Label (Predicted): Credit reporting, credit repair services, or other personal consumer reports --------------------------------------- Narrative: THIS IS IN REFERENCE TO MY ORIGINAL COMPLAINT WITH GREAT LAKES # XXXX, I am appe ... Label (Actual): Credit reporting, credit repair services, or other personal consumer reports Label (Predicted): Student loan --------------------------------------- Narrative: This account is inaccurately reporting on my credit report : XXXX XXXX ... Label (Actual): Credit reporting, credit repair services, or other personal consumer reports Label (Predicted): Credit reporting, credit repair services, or other personal consumer reports --------------------------------------- Narrative: They stated three different dates when a personal check would be cashed. There i ... Label (Actual): Checking or savings account Label (Predicted): Checking or savings account --------------------------------------- Narrative: XX/XX/2020 i told social security i wanted to get my benefits on the directexpre ... Label (Actual): Credit card or prepaid card Label (Predicted): Credit card or prepaid card --------------------------------------- Narrative: On Wednesday, XXXX XXXX, leased a new Toyota Sienna. I called Trans Union and " ... Label (Actual): Credit reporting Label (Predicted): Credit reporting ---------------------------------------